1 Setup

First, we set up our workspace. Load maftools to analyze the maf files.

Get all the mafs and merge them into one big maf.

library(maftools)
library(tidyverse)
library(synapser)
synLogin()
## Welcome, Robert Allaway!
## NULL
child <- synGetChildren('syn35817218')$asList()
ids <- map(child, "id") %>% unlist()

maf_paths <- lapply(ids, synGet)
## 
Downloading  [##########----------]47.90%   8.0MB/16.7MB (2.6MB/s) Strelka_28cNF.vep.maf.synapse_download_102758853     
Downloading  [##########----------]52.10%   8.7MB/16.7MB (2.8MB/s) Strelka_28cNF.vep.maf.synapse_download_102758853     
Downloading  [####################]100.00%   16.7MB/16.7MB (5.4MB/s) Strelka_28cNF.vep.maf.synapse_download_102758853 Done...    
Downloading  [###########---------]53.44%   8.0MB/15.0MB (2.3MB/s) Strelka_cNF00.10a.vep.maf.synapse_download_102758854     
Downloading  [####################]100.00%   15.0MB/15.0MB (4.3MB/s) Strelka_cNF00.10a.vep.maf.synapse_download_102758854 Done...    
Downloading  [##########----------]50.89%   8.0MB/15.7MB (2.0MB/s) Strelka_cNF04.9a.vep.maf.synapse_download_102758856     
Downloading  [####################]100.00%   15.7MB/15.7MB (3.9MB/s) Strelka_cNF04.9a.vep.maf.synapse_download_102758856 Done...    
Downloading  [###-----------------]13.82%   2.6MB/18.6MB (443.6kB/s) Strelka_cNF97.2a.vep.maf.synapse_download_102758857     
Downloading  [###########---------]56.91%   10.6MB/18.6MB (1.8MB/s) Strelka_cNF97.2a.vep.maf.synapse_download_102758857     
Downloading  [####################]100.00%   18.6MB/18.6MB (3.1MB/s) Strelka_cNF97.2a.vep.maf.synapse_download_102758857 Done...    
Downloading  [#########-----------]47.26%   8.0MB/16.9MB (1.6MB/s) Strelka_cNF97.2b.vep.maf.synapse_download_102758861     
Downloading  [###################-]94.53%   16.0MB/16.9MB (3.3MB/s) Strelka_cNF97.2b.vep.maf.synapse_download_102758861     
Downloading  [####################]100.00%   16.9MB/16.9MB (3.5MB/s) Strelka_cNF97.2b.vep.maf.synapse_download_102758861 Done...    
Downloading  [#########-----------]46.91%   8.0MB/17.1MB (2.2MB/s) Strelka_cNF98.4c.vep.maf.synapse_download_102758862     
Downloading  [###################-]93.81%   16.0MB/17.1MB (4.3MB/s) Strelka_cNF98.4c.vep.maf.synapse_download_102758862     
Downloading  [####################]100.00%   17.1MB/17.1MB (4.6MB/s) Strelka_cNF98.4c.vep.maf.synapse_download_102758862 Done...    
Downloading  [########------------]41.11%   8.0MB/19.5MB (1.8MB/s) Strelka_cNF98.4d.vep.maf.synapse_download_102758865     
Downloading  [################----]82.22%   16.0MB/19.5MB (3.5MB/s) Strelka_cNF98.4d.vep.maf.synapse_download_102758865     
Downloading  [####################]100.00%   19.5MB/19.5MB (4.3MB/s) Strelka_cNF98.4d.vep.maf.synapse_download_102758865 Done...    
Downloading  [#########-----------]43.52%   8.0MB/18.4MB (2.0MB/s) Strelka_i28cNF.vep.maf.synapse_download_102758867     
Downloading  [#################---]87.04%   16.0MB/18.4MB (4.0MB/s) Strelka_i28cNF.vep.maf.synapse_download_102758867     
Downloading  [####################]100.00%   18.4MB/18.4MB (4.6MB/s) Strelka_i28cNF.vep.maf.synapse_download_102758867 Done...    
Downloading  [########------------]42.23%   8.0MB/18.9MB (1.8MB/s) Strelka_icNF00.10a.vep.maf.synapse_download_102758869     
Downloading  [############--------]57.77%   10.9MB/18.9MB (2.4MB/s) Strelka_icNF00.10a.vep.maf.synapse_download_102758869     
Downloading  [####################]100.00%   18.9MB/18.9MB (4.2MB/s) Strelka_icNF00.10a.vep.maf.synapse_download_102758869 Done...    
Downloading  [####----------------]18.49%   3.6MB/19.6MB (753.3kB/s) Strelka_icNF04.9a.vep.maf.synapse_download_102758870     
Downloading  [############--------]59.24%   11.6MB/19.6MB (2.4MB/s) Strelka_icNF04.9a.vep.maf.synapse_download_102758870     
Downloading  [####################]100.00%   19.6MB/19.6MB (4.0MB/s) Strelka_icNF04.9a.vep.maf.synapse_download_102758870 Done...    
Downloading  [###-----------------]13.75%   2.6MB/18.6MB (596.1kB/s) Strelka_icNF97.2a.vep.maf.synapse_download_102758872     
Downloading  [###########---------]56.88%   10.6MB/18.6MB (2.4MB/s) Strelka_icNF97.2a.vep.maf.synapse_download_102758872     
Downloading  [####################]100.00%   18.6MB/18.6MB (4.2MB/s) Strelka_icNF97.2a.vep.maf.synapse_download_102758872 Done...    
Downloading  [#########-----------]46.27%   6.9MB/14.9MB (1.9MB/s) Strelka_icNF97.2b.vep.maf.synapse_download_102758875     
Downloading  [####################]100.00%   14.9MB/14.9MB (4.0MB/s) Strelka_icNF97.2b.vep.maf.synapse_download_102758875 Done...    
Downloading  [#########-----------]45.87%   8.0MB/17.4MB (1.7MB/s) Strelka_icNF98.4c.vep.maf.synapse_download_102758877     
Downloading  [###########---------]54.13%   9.4MB/17.4MB (2.0MB/s) Strelka_icNF98.4c.vep.maf.synapse_download_102758877     
Downloading  [####################]100.00%   17.4MB/17.4MB (3.7MB/s) Strelka_icNF98.4c.vep.maf.synapse_download_102758877 Done...    
Downloading  [##########----------]51.10%   8.0MB/15.7MB (1.9MB/s) Strelka_icNF98.4d.vep.maf.synapse_download_102758879     
Downloading  [####################]100.00%   15.7MB/15.7MB (3.7MB/s) Strelka_icNF98.4d.vep.maf.synapse_download_102758879 Done...
mafs <- lapply(maf_paths, function(x){
  maftools::read.maf(x$path)
})
## -Reading
## -Validating
## -Silent variants: 17776 
## -Summarizing
## --Possible FLAGS among top ten genes:
##   AHNAK2
##   MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 1.020s elapsed (0.896s cpu) 
## -Reading
## -Validating
## -Silent variants: 15636 
## -Summarizing
## --Possible FLAGS among top ten genes:
##   MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.688s elapsed (0.599s cpu) 
## -Reading
## -Validating
## -Silent variants: 16648 
## -Summarizing
## --Possible FLAGS among top ten genes:
##   MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.657s elapsed (0.563s cpu) 
## -Reading
## -Validating
## -Silent variants: 19848 
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.833s elapsed (0.687s cpu) 
## -Reading
## -Validating
## -Silent variants: 17921 
## -Summarizing
## --Possible FLAGS among top ten genes:
##   MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.791s elapsed (0.666s cpu) 
## -Reading
## -Validating
## -Silent variants: 18138 
## -Summarizing
## --Possible FLAGS among top ten genes:
##   AHNAK2
##   MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.473s elapsed (0.378s cpu) 
## -Reading
## -Validating
## -Silent variants: 20608 
## -Summarizing
## --Possible FLAGS among top ten genes:
##   AHNAK2
##   MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.611s elapsed (0.488s cpu) 
## -Reading
## -Validating
## -Silent variants: 19699 
## -Summarizing
## --Possible FLAGS among top ten genes:
##   AHNAK2
##   MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.877s elapsed (0.746s cpu) 
## -Reading
## -Validating
## -Silent variants: 20231 
## -Summarizing
## --Possible FLAGS among top ten genes:
##   MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.488s elapsed (0.386s cpu) 
## -Reading
## -Validating
## -Silent variants: 21032 
## -Summarizing
## --Possible FLAGS among top ten genes:
##   MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.581s elapsed (0.432s cpu) 
## -Reading
## -Validating
## -Silent variants: 19790 
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.810s elapsed (0.699s cpu) 
## -Reading
## -Validating
## -Silent variants: 15573 
## -Summarizing
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.560s elapsed (0.380s cpu) 
## -Reading
## -Validating
## -Silent variants: 18430 
## -Summarizing
## --Possible FLAGS among top ten genes:
##   AHNAK2
##   MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.728s elapsed (0.628s cpu) 
## -Reading
## -Validating
## -Silent variants: 16403 
## -Summarizing
## --Possible FLAGS among top ten genes:
##   AHNAK2
##   MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 0.421s elapsed (0.388s cpu)
merged_maf <- merge_mafs(mafs)
## Merging 14 MAF objects
## -Validating
## -Silent variants: 257733 
## -Summarizing
## --Possible FLAGS among top ten genes:
##   AHNAK2
##   MUC16
## -Processing clinical data
## -Finished in 2.580s elapsed (2.348s cpu)

2 Compare mafs

First, let’s compare the immortalized vs primary cells to see if there are any significant differentially mutated genes. There do not appear to be (lowest p-value ~0.1 and all p-values = 1 after FDR correction). With more cell lines, we might be able to detect a difference here, but with the current experiment we don’t detect any significantly differentially mutated genes.

primary_mafs <- subsetMaf(merged_maf, tsb = c("28cNF","cNF00.10a","cNF04.9a","cNF97.2a","cNF97.2b","cNF98.4c","cNF98.4d"))
## --Possible FLAGS among top ten genes:
##   AHNAK2
##   MUC16
## -Processing clinical data
immortalized_mafs <- subsetMaf(merged_maf, tsb = c("i28cNF","icNF00.10a","icNF04.9a","icNF97.2a","icNF97.2b","icNF98.4c","icNF98.4d"))
## --Possible FLAGS among top ten genes:
##   AHNAK2
##   MUC16
## -Processing clinical data
compare_mafs <- mafCompare(primary_mafs,immortalized_mafs, m1Name = "Primary", m2Name = "Immortalized", minMut = 0)

compare_mafs[1]
## $results
##       Hugo_Symbol Primary Immortalized      pval        or      ci.up
##    1:      NUP153       5            1 0.1025641 11.717446 817.982011
##    2:        TAF4       5            1 0.1025641 11.717446 817.982011
##    3:       IFNA4       0            3 0.1923077  0.000000   2.166928
##    4:        SORD       3            0 0.1923077       Inf        Inf
##    5:       CCNA2       4            1 0.2657343  6.788517 456.287433
##   ---                                                                
## 1772:      ZNF814       7            7 1.0000000  0.000000        Inf
## 1773:      ZNF831       4            4 1.0000000  1.000000  13.029360
## 1774:       ZNF91       2            2 1.0000000  1.000000  19.262994
## 1775:      ZSWIM6       2            1 1.0000000  2.255400 164.580622
## 1776:       ZWINT       2            2 1.0000000  1.000000  19.262994
##           ci.low adjPval
##    1: 0.72033908       1
##    2: 0.72033908       1
##    3: 0.00000000       1
##    4: 0.46148290       1
##    5: 0.41998181       1
##   ---                   
## 1772: 0.00000000       1
## 1773: 0.07674974       1
## 1774: 0.05191301       1
## 1775: 0.09086265       1
## 1776: 0.05191301       1

3 Plot maf summaries

Let’s plot a summary of each maf.

lapply(mafs, function(x){
       plotmafSummary(x)
  })

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
## 
## [[12]]
## NULL
## 
## [[13]]
## NULL
## 
## [[14]]
## NULL

4 Plot maf summaries

Let’s plot a summary of each cohort.

p <- plotmafSummary(primary_mafs, showBarcodes = T, textSize = 0.5)

p
## NULL
pdf('../figures/Strelka2_primaryMafSummary.pdf')
p
## NULL
dev.off()
## quartz_off_screen 
##                 2
p <- plotmafSummary(immortalized_mafs, showBarcodes = T, textSize = 0.5)

p
## NULL
pdf('../figures/Strelka2_immortalizedMafSummary.pdf')
p
## NULL
dev.off()
## quartz_off_screen 
##                 2

4.1 Look at NF1 variants

p <- lollipopPlot2(primary_mafs, immortalized_mafs, gene = 'NF1',m1_label = 'all', m2_label = 'all',labPosAngle = 45, labPosSize = 0.75, domainLabelSize = 0.5, m1_name = "Primary", m2_name = "Immortalized")
##    HGNC    refseq.ID   protein.ID aa.length
## 1:  NF1    NM_000267    NP_000258      2818
## 2:  NF1 NM_001042492 NP_001035957      2839
##    HGNC    refseq.ID   protein.ID aa.length
## 1:  NF1    NM_000267    NP_000258      2818
## 2:  NF1 NM_001042492 NP_001035957      2839

p
## $M1
##    Domain_Start Domain_End Domain_Label Variant_Classification Conversion
## 1:           NA         NA         <NA>        Frame_Shift_Del   N78Ifs*7
## 2:           NA         NA         <NA>        Frame_Shift_Del M643Ifs*45
##    Position n_variants
## 1:       78          2
## 2:      643          1
## 
## $M2
##    Domain_Start Domain_End Domain_Label Variant_Classification   Conversion
## 1:           NA         NA         <NA>        Frame_Shift_Del     N78Ifs*7
## 2:           NA         NA         <NA>            Splice_Site X2235_splice
## 3:           NA         NA         <NA>        Frame_Shift_Del  T2464Kfs*13
##    Position n_variants
## 1:       78          1
## 2:     2235          1
## 3:     2464          1
pdf("../figures/DeepVariant_NF1_variants.pdf")
p
## $M1
##    Domain_Start Domain_End Domain_Label Variant_Classification Conversion
## 1:           NA         NA         <NA>        Frame_Shift_Del   N78Ifs*7
## 2:           NA         NA         <NA>        Frame_Shift_Del M643Ifs*45
##    Position n_variants
## 1:       78          2
## 2:      643          1
## 
## $M2
##    Domain_Start Domain_End Domain_Label Variant_Classification   Conversion
## 1:           NA         NA         <NA>        Frame_Shift_Del     N78Ifs*7
## 2:           NA         NA         <NA>            Splice_Site X2235_splice
## 3:           NA         NA         <NA>        Frame_Shift_Del  T2464Kfs*13
##    Position n_variants
## 1:       78          1
## 2:     2235          1
## 3:     2464          1
dev.off()
## quartz_off_screen 
##                 2

4.2 Table of NF1 variants

merged_maf@data %>% 
  filter(Hugo_Symbol == "NF1") %>% 
  select(Tumor_Sample_Barcode, Variant_Classification, Variant_Type, 
         HGVSc, HGVSp_Short,
         dbSNP_RS, SIFT, PolyPhen, IMPACT) %>% 
  set_names(c("Cell Line", "Variant Classification", "Variant Type",
          "Genetic Change", "Protein Change", "dbSNP ID", "SIFT", "PolyPhen", "IMPACT")) %>% 
  write_csv("../figures/Strelka2_NF1_variants.csv")
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] synapser_0.15.35 forcats_0.5.2    stringr_1.4.1    dplyr_1.0.10    
##  [5] purrr_0.3.4      readr_2.1.3      tidyr_1.2.1      tibble_3.1.8    
##  [9] ggplot2_3.3.6    tidyverse_1.3.2  maftools_2.12.0 
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4             sass_0.4.2             bit64_4.0.5           
##  [4] vroom_1.6.0            jsonlite_1.8.2         splines_4.2.1         
##  [7] pack_0.1-1             modelr_0.1.9           bslib_0.4.0           
## [10] assertthat_0.2.1       highr_0.9              googlesheets4_1.0.1   
## [13] cellranger_1.1.0       yaml_2.3.5             pillar_1.8.1          
## [16] backports_1.4.1        lattice_0.20-45        glue_1.6.2            
## [19] digest_0.6.29          RColorBrewer_1.1-3     rvest_1.0.3           
## [22] colorspace_2.0-3       htmltools_0.5.3        Matrix_1.5-1          
## [25] pkgconfig_2.0.3        PythonEmbedInR_0.12.79 broom_1.0.1           
## [28] haven_2.5.1            scales_1.2.1           tzdb_0.3.0            
## [31] googledrive_2.0.0      generics_0.1.3         ellipsis_0.3.2        
## [34] cachem_1.0.6           withr_2.5.0            cli_3.4.1             
## [37] survival_3.4-0         magrittr_2.0.3         crayon_1.5.2          
## [40] readxl_1.4.1           evaluate_0.16          fs_1.5.2              
## [43] fansi_1.0.3            xml2_1.3.3             tools_4.2.1           
## [46] data.table_1.14.2      hms_1.1.2              gargle_1.2.1          
## [49] lifecycle_1.0.2        munsell_0.5.0          reprex_2.0.2          
## [52] compiler_4.2.1         jquerylib_0.1.4        rlang_1.0.6           
## [55] grid_4.2.1             rstudioapi_0.14        rmarkdown_2.16        
## [58] DNAcopy_1.70.0         gtable_0.3.1           codetools_0.2-18      
## [61] DBI_1.1.3              R6_2.5.1               lubridate_1.8.0       
## [64] knitr_1.40             bit_4.0.4              fastmap_1.1.0         
## [67] utf8_1.2.2             stringi_1.7.8          parallel_4.2.1        
## [70] vctrs_0.4.2            dbplyr_2.2.1           tidyselect_1.1.2      
## [73] xfun_0.33